rm(list = ls())
library(countrycode)
library(foreign)
library(readstata13)

setwd("~/Desktop/QJPS Replication")

full_data <- read.csv("all_countries_ideal_points_panel.csv")
full_data$nameyear <- paste(full_data$cow, full_data$year, sep = "-")
full_data$region <- countrycode(full_data$cow, "cown", "region")

############################################################

#document length
data <- read.dta13("data/ccpcnc_v2_small.dta")
#get rid of state-years where data aren't collected completely
data <- data[!is.na(data$preamble),]
data$match <- paste(data$cowcode, data$year, sep = "-")
m <- match(full_data$nameyear, data$match)
table(is.na(m))
full_data$length <- data$length[m]

############################################################

#number of features
data <- read.csv("data/roll_call_matrix.csv")
data <- data[!is.na(data$preamble),]
data$match <- paste(data$cowcode, data$year, sep = "-")
rownames(data) <- data$match
data$X <- NULL
data$country <- NULL
data$year <- NULL
data$syst <- NULL
data$systid <- NULL
data$systyear <- NULL
data$evnt <- NULL
data$evntid <- NULL
data$evntyear <- NULL
data$evnttype <- NULL

#count number of non-missing values (minus two for cowcode and match)
features <- apply(data, 1, function(x) sum(!is.na(x), na.rm = T)) - 2

m <- match(full_data$nameyear, names(features))
table(is.na(m))
full_data$features <- features[m]

############################################################

#Get the last year of a change for each country
maxyear <- tapply(full_data$year, full_data$cow, max)
maxyear <- data.frame(cbind(names(maxyear), maxyear))

names(maxyear) <- c("cow", "max")

m <- match(full_data$cow, maxyear$cow)
table(is.na(m))
full_data$maxyear <- maxyear$max[m]

############################################################
##### Merge with QoG Econ Data #############################
############################################################

qog_panel <- read.csv("data/qog_std_ts_jan17.csv")
qog <- subset(qog_panel, select = c("ccodecow", "year", "lp_legor", 
                                    "pt_pres", "pt_maj", "gtm_parl", "gtm_pr", "gtm_unit",
                                    "iaep_ecdl", "iaep_eml", "iaep_es", "iaep_nee"))
qog$cow <- as.numeric(as.character(qog$ccodecow))
qog <- qog[!is.na(qog$cow),]
qog$match <- paste(qog$ccodecow, qog$year, sep = "-")

m <- match(full_data$nameyear, qog$match)
table(is.na(m))

full_data$pt_pres <- qog$pt_pres[m]
full_data$pt_maj <- qog$pt_maj[m]
full_data$lp_legor <- qog$lp_legor[m]
full_data$com_law <- ifelse(full_data$lp_legor=="1", 1, 0)
full_data$gtm_parl <- qog$gtm_parl[m]
full_data$gtm_pr <- qog$gtm_pr[m]
full_data$iaep_eml <- qog$iaep_eml[m]
full_data$iaep_es <- qog$iaep_es[m]
full_data$iaep_ecdl <- qog$iaep_ecdl[m]
full_data$iaep_nee <- qog$iaep_nee[m]





write.dta(full_data, "Table_1.dta")
